iT邦幫忙

2025 iThome 鐵人賽

DAY 19
0
Rust

Rust 實戰專案集:30 個漸進式專案從工具到服務系列 第 19

JSON Schema 驗證器 - 驗證 JSON 資料格式

  • 分享至 

  • xImage
  •  

前言

JSON 是最常見的資料交換格式。
然而,確保 JSON 資料符合預期的結構和類型是一個重要的挑戰。
今天我們將實作一個 JSON Schema 驗證器,它可以根據預定義的 schema 來驗證 JSON 資料的正確性

這次的學習目標

  • 理解 JSON Schema 的基本概念
  • 實作遞歸式的資料結構驗證
  • 處理複雜的巢狀 JSON 結構
  • 學習型別檢查和約束驗證
  • 提供清晰的錯誤訊息

開始建立專案

cargo new json_schema_validator
cd json_schema_validator

依賴

[dependencies]
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
thiserror = "1.0"
regex = "1.10"

定義錯誤類型

use thiserror::Error;

#[derive(Error, Debug)]
pub enum ValidationError {
    #[error("Type mismatch: expected {expected}, got {got}")]
    TypeMismatch { expected: String, got: String },
    
    #[error("Required property missing: {0}")]
    MissingProperty(String),
    
    #[error("Value {value} is less than minimum {minimum}")]
    BelowMinimum { value: f64, minimum: f64 },
    
    #[error("Value {value} is greater than maximum {maximum}")]
    AboveMaximum { value: f64, maximum: f64 },
    
    #[error("String length {length} is less than minLength {min_length}")]
    StringTooShort { length: usize, min_length: usize },
    
    #[error("String length {length} is greater than maxLength {max_length}")]
    StringTooLong { length: usize, max_length: usize },
    
    #[error("Array length {length} is less than minItems {min_items}")]
    ArrayTooShort { length: usize, min_items: usize },
    
    #[error("Array length {length} is greater than maxItems {max_items}")]
    ArrayTooLong { length: usize, max_items: usize },
    
    #[error("Pattern mismatch: value does not match pattern {pattern}")]
    PatternMismatch { pattern: String },
    
    #[error("Enum mismatch: value not in allowed values")]
    EnumMismatch,
    
    #[error("Additional property not allowed: {0}")]
    AdditionalPropertyNotAllowed(String),
    
    #[error("Validation error at {path}: {error}")]
    NestedError { path: String, error: Box<ValidationError> },
}

pub type ValidationResult = Result<(), ValidationError>;

定義 Schema 結構

use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Schema {
    #[serde(rename = "type")]
    pub schema_type: Option<SchemaType>,
    
    pub properties: Option<HashMap<String, Schema>>,
    pub required: Option<Vec<String>>,
    pub additional_properties: Option<bool>,
    
    // 數字驗證
    pub minimum: Option<f64>,
    pub maximum: Option<f64>,
    pub exclusive_minimum: Option<f64>,
    pub exclusive_maximum: Option<f64>,
    
    // 字串驗證
    pub min_length: Option<usize>,
    pub max_length: Option<usize>,
    pub pattern: Option<String>,
    
    // 陣列驗證
    pub items: Option<Box<Schema>>,
    pub min_items: Option<usize>,
    pub max_items: Option<usize>,
    pub unique_items: Option<bool>,
    
    // 列舉驗證
    #[serde(rename = "enum")]
    pub enum_values: Option<Vec<Value>>,
}

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum SchemaType {
    String,
    Number,
    Integer,
    Boolean,
    Object,
    Array,
    Null,
}

實作驗證

use regex::Regex;

pub struct Validator {
    schema: Schema,
}

impl Validator {
    pub fn new(schema: Schema) -> Self {
        Self { schema }
    }
    
    pub fn from_str(schema_str: &str) -> Result<Self, serde_json::Error> {
        let schema: Schema = serde_json::from_str(schema_str)?;
        Ok(Self::new(schema))
    }
    
    pub fn validate(&self, data: &Value) -> ValidationResult {
        self.validate_with_schema(data, &self.schema, "")
    }
    
    fn validate_with_schema(
        &self,
        data: &Value,
        schema: &Schema,
        path: &str,
    ) -> ValidationResult {
        // 驗證型別
        if let Some(schema_type) = &schema.schema_type {
            self.validate_type(data, schema_type, path)?;
        }
        
        // 根據資料類型進行具體驗證
        match data {
            Value::Object(obj) => self.validate_object(obj, schema, path)?,
            Value::Array(arr) => self.validate_array(arr, schema, path)?,
            Value::String(s) => self.validate_string(s, schema, path)?,
            Value::Number(n) => self.validate_number(n, schema, path)?,
            _ => {}
        }
        
        // 驗證列舉
        if let Some(enum_values) = &schema.enum_values {
            if !enum_values.contains(data) {
                return Err(ValidationError::EnumMismatch);
            }
        }
        
        Ok(())
    }
    
    fn validate_type(
        &self,
        data: &Value,
        schema_type: &SchemaType,
        _path: &str,
    ) -> ValidationResult {
        let actual_type = match data {
            Value::Null => SchemaType::Null,
            Value::Bool(_) => SchemaType::Boolean,
            Value::Number(n) => {
                if n.is_i64() || n.is_u64() {
                    SchemaType::Integer
                } else {
                    SchemaType::Number
                }
            }
            Value::String(_) => SchemaType::String,
            Value::Array(_) => SchemaType::Array,
            Value::Object(_) => SchemaType::Object,
        };
        
        let type_matches = match schema_type {
            SchemaType::Number => {
                matches!(actual_type, SchemaType::Number | SchemaType::Integer)
            }
            _ => schema_type == &actual_type,
        };
        
        if !type_matches {
            return Err(ValidationError::TypeMismatch {
                expected: format!("{:?}", schema_type),
                got: format!("{:?}", actual_type),
            });
        }
        
        Ok(())
    }
    
    fn validate_object(
        &self,
        obj: &serde_json::Map<String, Value>,
        schema: &Schema,
        path: &str,
    ) -> ValidationResult {
        // 驗證必要屬性
        if let Some(required) = &schema.required {
            for prop in required {
                if !obj.contains_key(prop) {
                    return Err(ValidationError::MissingProperty(prop.clone()));
                }
            }
        }
        
        // 驗證屬性
        if let Some(properties) = &schema.properties {
            for (key, value) in obj {
                if let Some(prop_schema) = properties.get(key) {
                    let new_path = if path.is_empty() {
                        key.clone()
                    } else {
                        format!("{}.{}", path, key)
                    };
                    
                    self.validate_with_schema(value, prop_schema, &new_path)
                        .map_err(|e| ValidationError::NestedError {
                            path: new_path,
                            error: Box::new(e),
                        })?;
                } else if !schema.additional_properties.unwrap_or(true) {
                    return Err(ValidationError::AdditionalPropertyNotAllowed(
                        key.clone(),
                    ));
                }
            }
        }
        
        Ok(())
    }
    
    fn validate_array(
        &self,
        arr: &[Value],
        schema: &Schema,
        path: &str,
    ) -> ValidationResult {
        // 驗證陣列長度
        if let Some(min_items) = schema.min_items {
            if arr.len() < min_items {
                return Err(ValidationError::ArrayTooShort {
                    length: arr.len(),
                    min_items,
                });
            }
        }
        
        if let Some(max_items) = schema.max_items {
            if arr.len() > max_items {
                return Err(ValidationError::ArrayTooLong {
                    length: arr.len(),
                    max_items,
                });
            }
        }
        
        // 驗證唯一性
        if schema.unique_items.unwrap_or(false) {
            let mut seen = std::collections::HashSet::new();
            for item in arr {
                let item_str = serde_json::to_string(item).unwrap();
                if !seen.insert(item_str) {
                    return Err(ValidationError::NestedError {
                        path: path.to_string(),
                        error: Box::new(ValidationError::EnumMismatch),
                    });
                }
            }
        }
        
        // 驗證陣列元素
        if let Some(items_schema) = &schema.items {
            for (i, item) in arr.iter().enumerate() {
                let new_path = format!("{}[{}]", path, i);
                self.validate_with_schema(item, items_schema, &new_path)
                    .map_err(|e| ValidationError::NestedError {
                        path: new_path,
                        error: Box::new(e),
                    })?;
            }
        }
        
        Ok(())
    }
    
    fn validate_string(
        &self,
        s: &str,
        schema: &Schema,
        _path: &str,
    ) -> ValidationResult {
        // 驗證長度
        if let Some(min_length) = schema.min_length {
            if s.len() < min_length {
                return Err(ValidationError::StringTooShort {
                    length: s.len(),
                    min_length,
                });
            }
        }
        
        if let Some(max_length) = schema.max_length {
            if s.len() > max_length {
                return Err(ValidationError::StringTooLong {
                    length: s.len(),
                    max_length,
                });
            }
        }
        
        // 驗證正則表達式
        if let Some(pattern) = &schema.pattern {
            let regex = Regex::new(pattern).map_err(|_| {
                ValidationError::PatternMismatch {
                    pattern: pattern.clone(),
                }
            })?;
            
            if !regex.is_match(s) {
                return Err(ValidationError::PatternMismatch {
                    pattern: pattern.clone(),
                });
            }
        }
        
        Ok(())
    }
    
    fn validate_number(
        &self,
        n: &serde_json::Number,
        schema: &Schema,
        _path: &str,
    ) -> ValidationResult {
        let value = n.as_f64().unwrap();
        
        // 驗證最小值
        if let Some(minimum) = schema.minimum {
            if value < minimum {
                return Err(ValidationError::BelowMinimum { value, minimum });
            }
        }
        
        if let Some(exclusive_minimum) = schema.exclusive_minimum {
            if value <= exclusive_minimum {
                return Err(ValidationError::BelowMinimum {
                    value,
                    minimum: exclusive_minimum,
                });
            }
        }
        
        // 驗證最大值
        if let Some(maximum) = schema.maximum {
            if value > maximum {
                return Err(ValidationError::AboveMaximum { value, maximum });
            }
        }
        
        if let Some(exclusive_maximum) = schema.exclusive_maximum {
            if value >= exclusive_maximum {
                return Err(ValidationError::AboveMaximum {
                    value,
                    maximum: exclusive_maximum,
                });
            }
        }
        
        Ok(())
    }
}

建立 cli (main.rs)

use std::fs;
use std::path::PathBuf;

fn main() {
    let args: Vec<String> = std::env::args().collect();
    
    if args.len() != 3 {
        eprintln!("Usage: {} <schema.json> <data.json>", args[0]);
        std::process::exit(1);
    }
    
    let schema_path = PathBuf::from(&args[1]);
    let data_path = PathBuf::from(&args[2]);
    
    // 讀取 schema
    let schema_content = fs::read_to_string(&schema_path)
        .unwrap_or_else(|e| {
            eprintln!("Error reading schema file: {}", e);
            std::process::exit(1);
        });
    
    // 讀取資料
    let data_content = fs::read_to_string(&data_path)
        .unwrap_or_else(|e| {
            eprintln!("Error reading data file: {}", e);
            std::process::exit(1);
        });
    
    // 解析資料
    let data: serde_json::Value = serde_json::from_str(&data_content)
        .unwrap_or_else(|e| {
            eprintln!("Error parsing data JSON: {}", e);
            std::process::exit(1);
        });
    
    // 建立驗證器
    let validator = Validator::from_str(&schema_content)
        .unwrap_or_else(|e| {
            eprintln!("Error parsing schema JSON: {}", e);
            std::process::exit(1);
        });
    
    // 執行驗證
    match validator.validate(&data) {
        Ok(()) => {
            println!("✓ Validation successful!");
            println!("Data conforms to the schema.");
        }
        Err(e) => {
            eprintln!("✗ Validation failed:");
            eprintln!("{}", e);
            std::process::exit(1);
        }
    }
}

開始使用

一樣先建立一些假資料

user_schema.json

{
  "type": "object",
  "properties": {
    "username": {
      "type": "string",
      "minLength": 3,
      "maxLength": 20,
      "pattern": "^[a-zA-Z0-9_]+$"
    },
    "email": {
      "type": "string",
      "pattern": "^[^@]+@[^@]+\\.[^@]+$"
    },
    "age": {
      "type": "integer",
      "minimum": 0,
      "maximum": 150
    },
    "status": {
      "type": "string",
      "enum": ["active", "inactive", "pending"]
    },
    "tags": {
      "type": "array",
      "items": {
        "type": "string"
      },
      "minItems": 1,
      "maxItems": 10,
      "uniqueItems": true
    }
  },
  "required": ["username", "email", "age"]
}

有效驗證

valid.json

{
  "username": "john_doe",
  "email": "john@example.com",
  "age": 25,
  "status": "active",
  "tags": ["developer", "rust"]
}

無效驗證

invalid_user.json

{
  "username": "jo",
  "email": "invalid-email",
  "age": 200
}
# 驗證有效資料
cargo run user_schema.json valid_user.json
# 輸出: ✓ Validation successful!

# 驗證無效資料
cargo run user_schema.json invalid_user.json
# 輸出: ✗ Validation failed: ...

好的!


上一篇
Log分析器 - 解析網頁伺服器 Daily Log 並產生統計
下一篇
資料庫遷移工具 - SQL 資料庫 schema 版本管理
系列文
Rust 實戰專案集:30 個漸進式專案從工具到服務24
圖片
  熱門推薦
圖片
{{ item.channelVendor }} | {{ item.webinarstarted }} |
{{ formatDate(item.duration) }}
直播中

尚未有邦友留言

立即登入留言